# Reading in our trees data (use read_csv for more options - better than read.csv)
sf_trees <- read_csv(here('data', 'sf_trees', "sf_trees.csv"), 
                     show_col_types = FALSE)

# Always inspect the data first 
summary(sf_trees)
##     tree_id       legal_status         species            address         
##  Min.   :     1   Length:192987      Length:192987      Length:192987     
##  1st Qu.: 52602   Class :character   Class :character   Class :character  
##  Median :120862   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :126529                                                           
##  3rd Qu.:202608                                                           
##  Max.   :261546                                                           
##                                                                           
##    site_order       site_info          caretaker              date           
##  Min.   :-50.000   Length:192987      Length:192987      Min.   :1955-09-19  
##  1st Qu.:  1.000   Class :character   Class :character   1st Qu.:1995-01-09  
##  Median :  2.000   Mode  :character   Mode  :character   Median :2001-06-12  
##  Mean   :  4.579                                         Mean   :2000-10-12  
##  3rd Qu.:  4.000                                         3rd Qu.:2008-09-25  
##  Max.   :501.000                                         Max.   :2020-01-25  
##  NA's   :1634                                            NA's   :124610      
##       dbh           plot_size            latitude       longitude     
##  Min.   :   0.00   Length:192987      Min.   :37.51   Min.   :-138.3  
##  1st Qu.:   3.00   Class :character   1st Qu.:37.74   1st Qu.:-122.5  
##  Median :   7.00   Mode  :character   Median :37.76   Median :-122.4  
##  Mean   :   9.95                      Mean   :37.77   Mean   :-122.4  
##  3rd Qu.:  12.00                      3rd Qu.:37.78   3rd Qu.:-122.4  
##  Max.   :9999.00                      Max.   :47.27   Max.   :-122.4  
##  NA's   :41819                        NA's   :2832    NA's   :2832
names(sf_trees)
##  [1] "tree_id"      "legal_status" "species"      "address"      "site_order"  
##  [6] "site_info"    "caretaker"    "date"         "dbh"          "plot_size"   
## [11] "latitude"     "longitude"
# Viewing the top 5 most common legal statuses and their respective tree count numbers 
top_5_status <- sf_trees %>% 
  group_by(legal_status) %>% 
  summarize(tree_count = n()) %>% 
  slice_max(tree_count, n = 5) %>% # just keeps the maximum/largest values of the number of top observations we want 
  arrange(-tree_count) # arranges selected choices from greatest to least